## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: strucchange
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
##
## Attaching package: 'strucchange'
## The following object is masked from 'package:stringr':
##
## boundary
## Loading required package: urca
## Loading required package: lmtest
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
So far, all of the neuroscientific evidence in favor of the CMC comes from a series of Dynamic Causal Modeling (DCM) analysis of task-based fMRI. The results are certainly convincing, but:
We need converging evidence from another method, to make sure that the the only method used so far (DCM) is not intrinsically biased in favor of a CMC-like architecture. Using a converging method is actually Goal #2 in AFOSR grant.
Although DCM is an elegant tool, it has its fair share of detractors, and two acknowledged downsides.
2.1. It relies on many assumptions. For instance, DCM also depends on the how task events are modeled are how they are designed to drive neural activity.
2.2. DCM can only be used in a top-down way, examining different models that need to be chosen beforehand. This is contrast to the most common bottom-up approach i the neurosciences, in which the brain’s architecture is inferred from an analysis of the data.
So, we need a method to estimate effective (i.e., directed) connectivity from data
Let’s see if we can get CMC from it. The
VOIs <- c('Action',
'LTM',
'Perception',
'Procedural',
'WM')
TASKS <- c('Gambling',
'Relational',
'Social',
'WM',
'Language',
'Emotion')
R <- length(VOIs) # Num ROIs
First, we need to perform GC analysis on all data.
df <- NULL
for (task in TASKS) {
for (sub in dir(task)[grep("sub-", dir(task))]) {
M <-read_tsv(paste(task, sub, "cmc.txt", sep="/"),
col_names = VOIs,
col_types = cols(
Action = col_double(),
LTM = col_double(),
Perception = col_double(),
Procedural = col_double(),
WM = col_double()
))
# Select the best lag, using BIC or "Schwartz Criterion"
lag <- VARselect(M)$selection[3]
gm <- VAR(M, type="none", p = lag)
coef <- coefficients(gm)
for (v in 1:R) {
voi <- VOIs[v]
Estimates <- coef[[v]][,1][1 : (R * lag)] # estimates
Pvalues <- coef[[v]][,4][1 : (R * lag)] # p-values
subdf <- data.frame(Task = rep(task, R * lag),
Subject = rep(sub, R * lag),
To = rep(voi, R * lag),
From = rep(VOIs, lag),
Lag = sort(rep(1:lag, R)),
Estimate = Estimates,
p = Pvalues)
if (is.null(df)) {
df <- subdf
} else {
df <- rbind(df, subdf)
}
}
}
}
granger_data_complete <- as_tibble(df)
Now, because we have different Lags, we are going to pick the smallest p value for each region across all lags. We are also going to translate these p-values into binary connections, stored in the “Link” variable. If a connection has a p-value < 0.05, it is considered an effective connection and part of that participant’s architecture.
granger_data <- granger_data_complete %>%
group_by(Task, Subject, To, From) %>%
summarise(p = min(p),
Estimate = mean(Estimate)) %>%
mutate(Link = if_else(p < 0.05, 1, 0))
## `summarise()` has grouped output by 'Task', 'Subject', 'To'. You can override using the `.groups` argument.
Here is an example of the four timeseries
data <- M %>%
add_column(Time = 1:nrow(M)) %>%
pivot_longer(cols = c(Action, LTM, Perception, Procedural, WM),
names_to = "Region",
values_to = "BOLD")
ggplot(data, aes(x=Time, y=BOLD, col=Region)) +
geom_line() +
scale_color_brewer(palette="Set2") +
facet_wrap(~ Region) +
theme_pander()
And here are the p values associated with each connection, in each participant, for each task.
for (task in TASKS) {
p <- ggplot(filter(granger_data,
Task == task),
aes(x=From, y=To)) +
geom_tile(aes(fill = p), col="white") +
scale_fill_viridis_c(option="inferno") +
ggtitle("Probability of Connection") +
coord_equal(ratio = 1) +
facet_wrap(~ Subject) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
ggtitle(task) +
theme_pander()
print(p)
}
# ggplot(granger_data, aes(x=From, y=To)) +
# geom_tile(aes(fill=Link), col="white") +
# scale_fill_viridis_c(option="inferno", end = 0.8) +
# ggtitle("Inferred Architecture") +
# facet_wrap(Subject ~ Task ) +
# coord_equal(ratio = 1) +
# theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
# theme_pander()
Now, we can look at the distribution of lags across tasks and participants:
lags <- granger_data_complete %>%
group_by(Task, Subject) %>%
summarise(Lag = max(Lag))
## `summarise()` has grouped output by 'Task'. You can override using the `.groups` argument.
ggplot(lags, aes(x=Lag, fill=Task)) +
geom_bar(position = "dodge",
col="white",
width=0.75) +
scale_fill_jama() +
theme_pander()
To infer an architecture from a distribution of p-values, we need a few statistical functions. Here are a few that will be computed.
Mode <- function(codes) {
which.max(tabulate(codes))
}
fisher.chi <- function(pvals) {
logsum <- -2 * sum(log(pvals))
1 - pchisq(logsum, df = (2 * length(pvals)))
}
friston.test <- function(pvals) {
max(pvals) ** length(pvals)
}
nichols.test <- function(pvals) {
max(pvals)
}
tippet.test <- function(pvals) {
s <- min(pvals)
1 - (1-s)**length(pvals)
}
binom.test.cmc <- function(binvals) {
binom.test(sum(binvals),
n=length(binvals),
alternative = "less")$p.value
}
And now we can aggregate the data by task, using the functions above.
group_data <- granger_data %>%
group_by(Task, To, From) %>%
summarize(p.nichols = nichols.test(p),
p.friston = friston.test(p),
Plink = binom.test.cmc(Link),
Sign = sum(2*Link - 1),
Weight = mean(Estimate)) %>%
mutate(Link = if_else(Plink > 0.95, 1, 0),
FristonLink = if_else(p.friston < 0.05, 1, 0))
## `summarise()` has grouped output by 'Task', 'To'. You can override using the `.groups` argument.
Here are the task-level functional architectures.
ggplot(group_data,
aes(x=From, y=To)) +
geom_tile(aes(fill=Plink), col="white") +
labs(fill="Probability") +
facet_wrap(~ Task) +
scale_fill_viridis_c(option="inferno", end = 0.8) +
ggtitle("Task-Specific Architectures") +
coord_equal(ratio = 1) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
theme_pander() +
theme(legend.position = "bottom")
ggplot(group_data,
aes(x=From, y=To)) +
geom_tile(aes(fill=Link), col="white") +
labs(fill="Probability") +
facet_wrap(~ Task) +
scale_fill_viridis_c(option="inferno", end = 0.8) +
ggtitle("Task-Specific Architectures") +
coord_equal(ratio = 1) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
theme_pander() +
theme(legend.position = "bottom")
Now, let’s calculate an architecture across all tasks. We will use majority rule: If a connection apperas in at least half of the tasks, it will be considered part of the architecture.
architecture <- group_data %>%
group_by(To, From) %>%
summarise(Link = if_else(sum(Link) >= 3, 1, 0),
PFisher = 1 - fisher.chi(1 - Plink)) %>%
add_column(Connectivity = "Empirical")
## `summarise()` has grouped output by 'To'. You can override using the `.groups` argument.
ggplot(architecture,
aes(x=From, y=To)) +
geom_tile(aes(fill=PFisher), col="white") +
scale_fill_viridis_c(option="inferno", end = 0.8) +
ggtitle("Domain-General Architecture") +
coord_equal(ratio = 1) +
labs(fill="Probability") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
theme_pander()
And now we can compare it to the CMC theory.
architectures <- read_tsv("architectures.tsv",
col_types = cols(
From = col_character(),
To = col_character(),
Link = col_double()
))
cmc <- architectures %>%
filter(Model == "CMC")
cmc <- cmc %>%
add_column(Connectivity = "CMC Theory")
archi <- architecture %>%
dplyr::select(To, From, PFisher, Connectivity) %>%
mutate(Link = if_else(PFisher > 0.999, 1, 0)) %>%
dplyr::select(To, From, Link, Connectivity)
comparison <- full_join(archi,
cmc)
## Joining, by = c("To", "From", "Link", "Connectivity")
ggplot(comparison,
aes(x=From, y=To)) +
geom_tile(aes(fill=Link), col="white") +
facet_wrap(~ Connectivity) +
scale_fill_viridis_c(option="inferno", end = 0.8) +
ggtitle("Predicted vs. Observed Architecture") +
coord_equal(ratio = 1) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
theme_pander()
The CMC differs from the empirically observed connectivity only in 3 of the 25 predictions. Assuming that the proability of a connection is 14/25 (the number of connections predicted by the theory), the probability of making 22 correct identifications out of 25 is binom.test(22, 25, p = 14/25).
But what about other architectures? Here, we will consider the six competitor architectures examined in the NeuroImage paper. Here is their matrix representation.
architectures$Model <- factor(architectures$Model,
levels = c("Hub PFC", "Hub Temporal", "Hub Procedural",
"Hierarchical 1", "Hierarchical 2", "Hierarchical 3",
"CMC"))
ggplot(architectures,
aes(x=From, y=To)) +
geom_tile(aes(fill=Link), col="white") +
facet_wrap(~ Model, nrow=3) +
ggtitle("Connectivity Matrix Representation\nof the Different Architectures ") +
scale_fill_viridis_c(option="inferno", end = 0.8) +
# ggtitle("Predicted vs. Observed Architecture") +
coord_equal(ratio = 1) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
theme_pander() +
theme(legend.position = "NA")
How good do they do? To compare them, we will use three different metrics. The first is the degree of Overlap, or the proportion of correctly predicted connections. The second is the Correlation between the vectors of observed and predicted connections. The third is the Z-score, or the standard deviations of the distance between the number of correctly predicted connections and the number expected by chance.
Here is a table with the corresponding metrics:
mbinom <- function(x) {
binom.test(x, 25)$p.value
}
mbinom <- Vectorize(mbinom)
model_comparisons <- architectures %>%
group_by(Model) %>%
summarise(Correlation = cor(Link, archi$Link) ** 2,
Overlap = (25 - sum(abs(Link - archi$Link)))/25) %>%
mutate(Z = qnorm(1-mbinom(Overlap * 25)))
model_comparisons %>%
xtable() %>%
kable(digits=3) %>%
kable_styling(bootstrap_options = c("hover", "striped"))
| Model | Correlation | Overlap | Z |
|---|---|---|---|
| Hub PFC | 0.510 | 0.84 | 3.118 |
| Hub Temporal | 0.001 | 0.52 | -Inf |
| Hub Procedural | 0.100 | 0.36 | 0.740 |
| Hierarchical 1 | 0.001 | 0.52 | -Inf |
| Hierarchical 2 | 0.040 | 0.60 | 0.191 |
| Hierarchical 3 | 0.040 | 0.60 | 0.191 |
| CMC | 0.599 | 0.88 | 3.604 |
and here is a visual representation of the performance of the seven architectures. As shown, the CMC outperforms all of the other competitors.
model_names <- c("CMC",
"Hierarchical 1",
"Hierarchical 2",
"Hierarchical 3",
"Hub PFC",
"Hub Procedural",
"Hub Temporal"
)
model_colors <- c("red3",
"palegreen4", "palegreen3", "palegreen2",
#"goldenrod4", "goldenrod3", "goldenrod2",
"deepskyblue4", "deepskyblue3", "deepskyblue2")
model_comparisons %>%
mutate(Z = if_else(Z > 0, Z, 0)) %>%
pivot_longer(cols=c(Overlap, Correlation, Z),
names_to = "Measure",
values_to = "Value") -> lmodel_comparisons
lmodel_comparisons$Model <- factor(lmodel_comparisons$Model,
levels = model_names)
ggplot(lmodel_comparisons, aes(x=Model, y= Value)) +
geom_col(aes(fill=Model, col=Model), alpha=0.5) +
#scale_fill_jama() +
scale_fill_manual(values = model_colors) +
scale_color_manual(values = model_colors) +
facet_wrap(~ Measure, scales = "free_y", nrow=3) +
theme_pander() +
ggtitle("Comparison of All Architectures") +
theme(axis.text.x = element_text(angle=90, hjust=1)) +
geom_text(aes(label=format(round(Value, 2), nsmall = 2)),
position = position_stack(vjust = 0.5)) +
theme(legend.position = "NA",
axis.text.x = element_text(angle=45))